R Markdown

Exercise 1

Through the use of NGPhylogeny.fr I was able to create this phylogenic tree that displays the DNA sequences that I took from our Moodle site.

library(knitr)
knitr::include_graphics('Image tree Genomics png.png')

if (!requireNamespace("BiocManager", quietly = TRUE))
    install.packages("BiocManager")
BiocManager::install(version = "3.11")
## Bioconductor version 3.11 (BiocManager 1.30.10), R 4.0.3 (2020-10-10)
## Old packages: 'ps'
if (!requireNamespace("BiocManager", quietly = TRUE))
    install.packages("BiocManager")

BiocManager::install("ggtree")
## Bioconductor version 3.11 (BiocManager 1.30.10), R 4.0.3 (2020-10-10)
## Installing package(s) 'ggtree'
## 
## The downloaded binary packages are in
##  /var/folders/_8/93ymbwtn3t179r6g59lnt6sh0000gn/T//Rtmp2JNCJO/downloaded_packages
## Old packages: 'ps'
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2     ✓ purrr   0.3.4
## ✓ tibble  3.0.4     ✓ dplyr   1.0.2
## ✓ tidyr   1.1.2     ✓ stringr 1.4.0
## ✓ readr   1.4.0     ✓ forcats 0.5.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(ggtree)
## Registered S3 method overwritten by 'treeio':
##   method     from
##   root.phylo ape
## ggtree v2.2.4  For help: https://yulab-smu.github.io/treedata-book/
## 
## If you use ggtree in published research, please cite the most appropriate paper(s):
## 
## - Guangchuang Yu. Using ggtree to visualize data on tree-like structures. Current Protocols in Bioinformatics, 2020, 69:e96. doi:10.1002/cpbi.96
## - Guangchuang Yu, Tommy Tsan-Yuk Lam, Huachen Zhu, Yi Guan. Two methods for mapping and visualizing associated data on phylogeny using ggtree. Molecular Biology and Evolution 2018, 35(12):3041-3043. doi:10.1093/molbev/msy194
## - Guangchuang Yu, David Smith, Huachen Zhu, Yi Guan, Tommy Tsan-Yuk Lam. ggtree: an R package for visualization and annotation of phylogenetic trees with their covariates and other associated data. Methods in Ecology and Evolution 2017, 8(1):28-36. doi:10.1111/2041-210X.12628
## 
## Attaching package: 'ggtree'
## The following object is masked from 'package:tidyr':
## 
##     expand
browseVignettes("ggtree")
## starting httpd help server ... done
library(ape)
## 
## Attaching package: 'ape'
## The following object is masked from 'package:ggtree':
## 
##     rotate
library(tidyverse)
library(ggtree)

tree <- read.tree("data/fastree_result (1).tre(final)")
tree
## 
## Phylogenetic tree with 21 tips and 19 internal nodes.
## 
## Tip labels:
##   Archaeoglobus_fulgidus, Trypanosoma_cruzi_nuclear, Amphidinium_carterae, Saccharomyces_cerevisiae_nuclear, Homo_sapies_nuclear, Drosophila_yakuba_nuclear, ...
## Node labels:
##   , 0.998, 0.942, 1.000, 0.994, 0.974, ...
## 
## Unrooted; includes branch lengths.
# build a ggplot with a geom_tree
ggplot(tree) + geom_tree() + theme_tree()

# This is convenient shorthand
ggtree(tree)

# add a scale
ggtree(tree) + geom_treescale()

# or add the entire scale to the x axis with theme_tree2()
ggtree(tree) + theme_tree2()

ggtree(tree, branch.length="none")

ggtree(tree, branch.length="none", color="blue", size=2, linetype=3)

### Exercise 1

ggtree(tree, layout = "slanted")

ggtree(tree, layout = "circular")

ggtree(tree, branch.tree ='none', color = "red", size=2, linetype =19)
## Warning: Ignoring unknown parameters: branch.tree

## Warning: Ignoring unknown parameters: branch.tree

# create the basic plot
p <- ggtree(tree)
# add node points
p + geom_nodepoint()

# add tip points
p + geom_tippoint()

# Label the tips
p + geom_tiplab()

Exercise 2

p <- ggtree(tree)
p + ggtitle("Exercise 2 phylogent changing nodes and tips")

p + geom_nodepoint()

p + geom_tippoint()

p + geom_tiplab()

p + geom_point(aes(subset=(node==all())), shape=23, size=5, fill="purple") +
  geom_nodepoint(shape=21, fill="yellow")
## Warning: Ignoring unknown aesthetics: subset

ggtree(tree) + geom_text(aes(label=node), hjust=-.3)

ggtree(tree) + geom_tiplab()

ggtree(tree) + 
  geom_cladelabel(node=17, label="Some random clade", color="red")

ggtree(tree) + 
  geom_tiplab() + 
  geom_cladelabel(node=17, label="Some random clade", 
                  color="red2", offset=.8)

ggtree(tree) + 
  geom_tiplab() + 
  geom_cladelabel(node=17, label="Some random clade", 
                  color="red2", offset=.8) + 
  geom_cladelabel(node=21, label="A different clade", 
                  color="blue", offset=.8)

ggtree(tree) + 
  geom_tiplab() + 
  geom_cladelabel(node=17, label="Some random clade", 
                  color="red2", offset=.8, align=TRUE) + 
  geom_cladelabel(node=21, label="A different clade", 
                  color="blue", offset=.8, align=TRUE) + 
  theme_tree2() + 
  xlim(0, 70) + 
  theme_tree()

ggtree(tree) + 
  geom_tiplab() + 
  geom_hilight(node=30, fill="gold", extend = 0.5) + 
  geom_hilight(node=37, fill="purple", extend = 0.5)

ggtree(tree) + 
  geom_tiplab() + 
  geom_taxalink("E", "H", color="blue3") +
  geom_taxalink("C", "G", color="orange2", curvature=-.9)

library(treeio)
## treeio v1.12.0  For help: https://yulab-smu.github.io/treedata-book/
## 
## If you use treeio in published research, please cite:
## 
## LG Wang, TTY Lam, S Xu, Z Dai, L Zhou, T Feng, P Guo, CW Dunn, BR Jones, T Bradley, H Zhu, Y Guan, Y Jiang, G Yu. treeio: an R package for phylogenetic tree input and output with richly annotated and associated data. Molecular Biology and Evolution 2020, 37(2):599-603. doi: 10.1093/molbev/msz240
## 
## Attaching package: 'treeio'
## The following object is masked from 'package:ape':
## 
##     drop.tip
library(ggimage)
# Read the data
tree <- read.beast("data/flu_tree_beast.tree")
# supply a most recent sampling date so you get the dates
# and add a scale bar
ggtree(tree, mrsd="2013-01-01") + 
  theme_tree2() 

# Finally, add tip labels and adjust axis
ggtree(tree, mrsd="2013-01-01") + 
  theme_tree2() + 
  geom_tiplab(align=TRUE, linesize=.5) + 
  xlim(1990, 2020)

msaplot(p=ggtree(tree), fasta="data/flu_aasequence.fasta", window=c(150, 175))

set.seed(42)
trees <- lapply(rep(c(10, 25, 50, 100), 3), rtree)
class(trees) <- "multiPhylo"
ggtree(trees) + facet_wrap(~.id, scale="free", ncol=4) + ggtitle("Many trees. Such phylogenetics. Wow.")

# Generate a random tree with 30 tips
tree <- rtree(30)
# Make the original plot
p <- ggtree(tree)
# generate some random values for each tip label in the data
d1 <- data.frame(id=tree$tip.label, val=rnorm(30, sd=3))
# Make a second plot with the original, naming the new plot "dot", 
# using the data you just created, with a point geom.
p2 <- facet_plot(p, panel="dot", data=d1, geom=geom_point, aes(x=val), color='red3')
# Make some more data with another random value.
d2 <- data.frame(id=tree$tip.label, value = abs(rnorm(30, mean=100, sd=50)))
# Now add to that second plot, this time using the new d2 data above, 
# This time showing a bar segment, size 3, colored blue.
p3 <- facet_plot(p2, panel='bar', data=d2, geom=geom_segment, 
           aes(x=0, xend=value, y=y, yend=y), size=3, color='blue4') 
# Show all three plots with a scale
p3 + theme_tree2()

library(magick)
## Linking to ImageMagick 6.9.11.32
## Enabled features: cairo, fontconfig, freetype, lcms, pango, rsvg, webp
## Disabled features: fftw, ghostscript, x11
# get phylopic 

newick <- "((Pongo_abelii,(Gorilla_gorilla_gorilla,(Pan_paniscus,Pan_troglodytes)Pan,Homo_sapiens)Homininae)Hominidae,Nomascus_leucogenys)Hominoidea;"

tree <- read.tree(text=newick)

d <- ggimage::phylopic_uid(tree$tip.label)
d$body_mass = c(52, 114, 47, 45, 58, 6)

p <- ggtree(tree) %<+% d + 
  geom_tiplab(aes(image=uid, colour=body_mass), geom="phylopic", offset=2.5) +
  geom_tiplab(aes(label=label), offset = .2) + xlim(NA, 7) +
  scale_color_viridis_c()
p  

Exercise 4

library(tidyverse)
library(ggtree)

tree <- read.tree("data/fastree_result (1).tre(final)")
tree
## 
## Phylogenetic tree with 21 tips and 19 internal nodes.
## 
## Tip labels:
##   Archaeoglobus_fulgidus, Trypanosoma_cruzi_nuclear, Amphidinium_carterae, Saccharomyces_cerevisiae_nuclear, Homo_sapies_nuclear, Drosophila_yakuba_nuclear, ...
## Node labels:
##   , 0.998, 0.942, 1.000, 0.994, 0.974, ...
## 
## Unrooted; includes branch lengths.
p <- ggtree(tree, branch.length="none", color="red", size=2, linetype=3)

p + geom_nodepoint()

p + geom_tippoint()

p + geom_tiplab()

 ggtree(tree) +
  geom_nodelab()  +
 geom_tiplab(hjust=-.1, size = 3) +
  geom_hilight(node=30, fill="gold", extend = 0.5)  +
  geom_hilight(node=37, fill="purple", extend = 0.5) +
geom_hilight(node=25, fill="blue", extend = 0.5) +
geom_hilight(node=16, fill="purple", extend = 0.75) +
  geom_hilight(node=1, fill="purple", extend = 0.5)